*********************************************************************************
*** Match infant mortality sites with water source locations  
*** Create a data set that can be used to calculate least cost distance in ArcGIS 
***********************************************************************************

#delimit ;  set type double, permanentely ; 
clear ;  clear matrix ; clear mata ; 
set matsize 5000 ;  set maxvar 5000 ;   set more off;   set rmsg on ;  pause on;  


*** set path here *** 


*=====================================================================================; 
* Calculate least cost distance pairs for calculating least cost distance in ArcGIS ;

tempfile f1 f2 ; 
import excel "$path\Watersource_coordinates_unique.xlsx", sheet("Sheet1") firstrow clear ;  
label var id "water id" ; 
ren id water_id ; 
ren longitude longitude1 ; 
ren latitude  latitude1 ;  

cross using $path\IMR_sites.dta ;  
ren longitude longitude2 ;  
ren latitude  latitude2 ;  
label var ID "IMR id" ;  
ren ID imr_id ; 
geodist latitude1 longitude1 latitude2 longitude2, gen(dist) ;
label var dist "IMR sites to water supply source (underground water included)" ;  
sort imr_id dist ;  
preserve ;  
keep if type=="地下水" ;  
bysort imr_id: keep if _n<=3 ;  
save `f1' ; 
restore ;  

drop if type=="地下水" ;   
bysort imr_id: keep if _n<=5 ;  
append using `f1' ;  
sort imr_id dist ;  
save $pathdata\cost_IMR_water_match.dta, replace ; 
export excel using $path\cost_IMR_water_match.xlsx, firstrow(variables) replace ;   

*=======================================================;  


*** Calculate the distance between IMR sites and water supply sources ; 

use $path\IMR_sites.dta, clear ; 

ren longitude longitude0 ; 
ren latitude  latitude0 ;  

cross using $pathdata\watersource_simple.dta ;      
geodist latitude0 longitude0 latitude longitude, gen(x) ;
gen dist=x ;   
label var dist "IMR sites to cloest water supply source (underground water included)" ;  

foreach L in 25 50 75 100 125 150 175 200 { ; 
	gen M`L'=dist<=`L' ; 
	bysort code: egen N`L'=sum(M`L') ; 
	label var N`L' "Number of water supply sources within `L' KM" ;  
	drop M`L' ; 
} ;  

bysort code: egen y=min(x) ;  
replace dist=y ; 
label var dist "distance from IMR sites to cloest water supply source including underground water" ;  

replace x=. if type=="地下水" ;    
gen dist_alt=x ;  

foreach L in 25 50 75 100 125 150 175 200 { ; 
	gen M`L'_alt=dist_alt<=`L' ; 
	bysort code: egen N`L'_alt=sum(M`L'_alt) ; 
	label var N`L'_alt "Number of water supply sources within `L' KM excluding underground water" ;  
	drop M`L'_alt ; 
} ;  

bysort code: egen z=min(x) ;  
replace dist_alt=z ; 
label var dist_alt "distance from IMR sites to cloest water supply source excluding underground water" ;  
drop x y z ; 


sort code dist ; 
bysort code: keep if _n==1 ; 

save $pathdata\IMR_water_source_match.dta, replace ; 



















